home *** CD-ROM | disk | FTP | other *** search
/ Chip 2007 January, February, March & April / Chip-Cover-CD-2007-02.iso / Pakiet bezpieczenstwa / mini Pentoo LiveCD 2006.1 / mpentoo-2006.1.iso / livecd.squashfs / usr / lib / python2.4 / email / FeedParser.pyo (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2005-10-18  |  10.3 KB  |  442 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyo (Python 2.4)
  3.  
  4. """FeedParser - An email feed parser.
  5.  
  6. The feed parser implements an interface for incrementally parsing an email
  7. message, line by line.  This has advantages for certain applications, such as
  8. those reading email messages off a socket.
  9.  
  10. FeedParser.feed() is the primary interface for pushing new data into the
  11. parser.  It returns when there's nothing more it can do with the available
  12. data.  When you have no more data to push into the parser, call .close().
  13. This completes the parsing and returns the root message object.
  14.  
  15. The other advantage of this parser is that it will never throw a parsing
  16. exception.  Instead, when it finds something unexpected, it adds a 'defect' to
  17. the current message.  Defects are just instances that live on the message
  18. object's .defects attribute.
  19. """
  20. import re
  21. from email import Errors
  22. from email import Message
  23. NLCRE = re.compile('\r\n|\r|\n')
  24. NLCRE_bol = re.compile('(\r\n|\r|\n)')
  25. NLCRE_eol = re.compile('(\r\n|\r|\n)$')
  26. NLCRE_crack = re.compile('(\r\n|\r|\n)')
  27. headerRE = re.compile('^(From |[\\041-\\071\\073-\\176]{2,}:|[\\t ])')
  28. EMPTYSTRING = ''
  29. NL = '\n'
  30. NeedMoreData = object()
  31.  
  32. class BufferedSubFile(object):
  33.     '''A file-ish object that can have new data loaded into it.
  34.  
  35.     You can also push and pop line-matching predicates onto a stack.  When the
  36.     current predicate matches the current line, a false EOF response
  37.     (i.e. empty string) is returned instead.  This lets the parser adhere to a
  38.     simple abstraction -- it parses until EOF closes the current message.
  39.     '''
  40.     
  41.     def __init__(self):
  42.         self._partial = ''
  43.         self._lines = []
  44.         self._eofstack = []
  45.         self._closed = False
  46.  
  47.     
  48.     def push_eof_matcher(self, pred):
  49.         self._eofstack.append(pred)
  50.  
  51.     
  52.     def pop_eof_matcher(self):
  53.         return self._eofstack.pop()
  54.  
  55.     
  56.     def close(self):
  57.         self._lines.append(self._partial)
  58.         self._partial = ''
  59.         self._closed = True
  60.  
  61.     
  62.     def readline(self):
  63.         if not self._lines:
  64.             if self._closed:
  65.                 return ''
  66.             
  67.             return NeedMoreData
  68.         
  69.         line = self._lines.pop()
  70.         for ateof in self._eofstack[::-1]:
  71.             if ateof(line):
  72.                 self._lines.append(line)
  73.                 return ''
  74.                 continue
  75.         
  76.         return line
  77.  
  78.     
  79.     def unreadline(self, line):
  80.         self._lines.append(line)
  81.  
  82.     
  83.     def push(self, data):
  84.         '''Push some new data into this object.'''
  85.         data = self._partial + data
  86.         self._partial = ''
  87.         parts = NLCRE_crack.split(data)
  88.         self._partial = parts.pop()
  89.         lines = []
  90.         for i in range(len(parts) // 2):
  91.             lines.append(parts[i * 2] + parts[i * 2 + 1])
  92.         
  93.         self.pushlines(lines)
  94.  
  95.     
  96.     def pushlines(self, lines):
  97.         self._lines[:0] = lines[::-1]
  98.  
  99.     
  100.     def is_closed(self):
  101.         return self._closed
  102.  
  103.     
  104.     def __iter__(self):
  105.         return self
  106.  
  107.     
  108.     def next(self):
  109.         line = self.readline()
  110.         if line == '':
  111.             raise StopIteration
  112.         
  113.         return line
  114.  
  115.  
  116.  
  117. class FeedParser:
  118.     '''A feed-style parser of email.'''
  119.     
  120.     def __init__(self, _factory = Message.Message):
  121.         '''_factory is called with no arguments to create a new message obj'''
  122.         self._factory = _factory
  123.         self._input = BufferedSubFile()
  124.         self._msgstack = []
  125.         self._parse = self._parsegen().next
  126.         self._cur = None
  127.         self._last = None
  128.         self._headersonly = False
  129.  
  130.     
  131.     def _set_headersonly(self):
  132.         self._headersonly = True
  133.  
  134.     
  135.     def feed(self, data):
  136.         '''Push more data into the parser.'''
  137.         self._input.push(data)
  138.         self._call_parse()
  139.  
  140.     
  141.     def _call_parse(self):
  142.         
  143.         try:
  144.             self._parse()
  145.         except StopIteration:
  146.             pass
  147.  
  148.  
  149.     
  150.     def close(self):
  151.         '''Parse all remaining data and return the root message object.'''
  152.         self._input.close()
  153.         self._call_parse()
  154.         root = self._pop_message()
  155.         if root.get_content_maintype() == 'multipart' and not root.is_multipart():
  156.             root.defects.append(Errors.MultipartInvariantViolationDefect())
  157.         
  158.         return root
  159.  
  160.     
  161.     def _new_message(self):
  162.         msg = self._factory()
  163.         if self._cur and self._cur.get_content_type() == 'multipart/digest':
  164.             msg.set_default_type('message/rfc822')
  165.         
  166.         if self._msgstack:
  167.             self._msgstack[-1].attach(msg)
  168.         
  169.         self._msgstack.append(msg)
  170.         self._cur = msg
  171.         self._last = msg
  172.  
  173.     
  174.     def _pop_message(self):
  175.         retval = self._msgstack.pop()
  176.         if self._msgstack:
  177.             self._cur = self._msgstack[-1]
  178.         else:
  179.             self._cur = None
  180.         return retval
  181.  
  182.     
  183.     def _parsegen(self):
  184.         self._new_message()
  185.         headers = []
  186.         for line in self._input:
  187.             if line is NeedMoreData:
  188.                 yield NeedMoreData
  189.                 continue
  190.             
  191.             if not headerRE.match(line):
  192.                 if not NLCRE.match(line):
  193.                     self._input.unreadline(line)
  194.                 
  195.                 break
  196.             
  197.             headers.append(line)
  198.         
  199.         self._parse_headers(headers)
  200.         if self._headersonly:
  201.             lines = []
  202.             while True:
  203.                 line = self._input.readline()
  204.                 if line is NeedMoreData:
  205.                     yield NeedMoreData
  206.                     continue
  207.                 
  208.                 if line == '':
  209.                     break
  210.                 
  211.                 lines.append(line)
  212.             self._cur.set_payload(EMPTYSTRING.join(lines))
  213.             return None
  214.         
  215.         if self._cur.get_content_type() == 'message/delivery-status':
  216.             while True:
  217.                 self._input.push_eof_matcher(NLCRE.match)
  218.                 for retval in self._parsegen():
  219.                     if retval is NeedMoreData:
  220.                         yield NeedMoreData
  221.                         continue
  222.                     
  223.                 
  224.                 msg = self._pop_message()
  225.                 self._input.pop_eof_matcher()
  226.                 while True:
  227.                     line = self._input.readline()
  228.                     if line is NeedMoreData:
  229.                         yield NeedMoreData
  230.                         continue
  231.                     
  232.                     break
  233.                 while True:
  234.                     line = self._input.readline()
  235.                     if line is NeedMoreData:
  236.                         yield NeedMoreData
  237.                         continue
  238.                     
  239.                     break
  240.                 if line == '':
  241.                     break
  242.                 
  243.                 self._input.unreadline(line)
  244.             return None
  245.         
  246.         if self._cur.get_content_maintype() == 'message':
  247.             for retval in self._parsegen():
  248.                 if retval is NeedMoreData:
  249.                     yield NeedMoreData
  250.                     continue
  251.                 
  252.             
  253.             self._pop_message()
  254.             return None
  255.         
  256.         if self._cur.get_content_maintype() == 'multipart':
  257.             boundary = self._cur.get_boundary()
  258.             if boundary is None:
  259.                 self._cur.defects.append(Errors.NoBoundaryInMultipartDefect())
  260.                 lines = []
  261.                 for line in self._input:
  262.                     if line is NeedMoreData:
  263.                         yield NeedMoreData
  264.                         continue
  265.                     
  266.                     lines.append(line)
  267.                 
  268.                 self._cur.set_payload(EMPTYSTRING.join(lines))
  269.                 return None
  270.             
  271.             separator = '--' + boundary
  272.             boundaryre = re.compile('(?P<sep>' + re.escape(separator) + ')(?P<end>--)?(?P<ws>[ \\t]*)(?P<linesep>\\r\\n|\\r|\\n)?$')
  273.             capturing_preamble = True
  274.             preamble = []
  275.             linesep = False
  276.             while True:
  277.                 line = self._input.readline()
  278.                 if line is NeedMoreData:
  279.                     yield NeedMoreData
  280.                     continue
  281.                 
  282.                 if line == '':
  283.                     break
  284.                 
  285.                 mo = boundaryre.match(line)
  286.                 if mo:
  287.                     if mo.group('end'):
  288.                         linesep = mo.group('linesep')
  289.                         break
  290.                     
  291.                     if capturing_preamble:
  292.                         if preamble:
  293.                             lastline = preamble[-1]
  294.                             eolmo = NLCRE_eol.search(lastline)
  295.                             if eolmo:
  296.                                 preamble[-1] = lastline[:-len(eolmo.group(0))]
  297.                             
  298.                             self._cur.preamble = EMPTYSTRING.join(preamble)
  299.                         
  300.                         capturing_preamble = False
  301.                         self._input.unreadline(line)
  302.                         continue
  303.                     
  304.                     while True:
  305.                         line = self._input.readline()
  306.                         if line is NeedMoreData:
  307.                             yield NeedMoreData
  308.                             continue
  309.                         
  310.                         mo = boundaryre.match(line)
  311.                         if not mo:
  312.                             self._input.unreadline(line)
  313.                             break
  314.                             continue
  315.                     self._input.push_eof_matcher(boundaryre.match)
  316.                     for retval in self._parsegen():
  317.                         if retval is NeedMoreData:
  318.                             yield NeedMoreData
  319.                             continue
  320.                         
  321.                     
  322.                     if self._last.get_content_maintype() == 'multipart':
  323.                         epilogue = self._last.epilogue
  324.                         if epilogue == '':
  325.                             self._last.epilogue = None
  326.                         elif epilogue is not None:
  327.                             mo = NLCRE_eol.search(epilogue)
  328.                             if mo:
  329.                                 end = len(mo.group(0))
  330.                                 self._last.epilogue = epilogue[:-end]
  331.                             
  332.                         
  333.                     else:
  334.                         payload = self._last.get_payload()
  335.                         if isinstance(payload, basestring):
  336.                             mo = NLCRE_eol.search(payload)
  337.                             if mo:
  338.                                 payload = payload[:-len(mo.group(0))]
  339.                                 self._last.set_payload(payload)
  340.                             
  341.                         
  342.                     self._input.pop_eof_matcher()
  343.                     self._pop_message()
  344.                     self._last = self._cur
  345.                     continue
  346.                 preamble.append(line)
  347.             if capturing_preamble:
  348.                 self._cur.defects.append(Errors.StartBoundaryNotFoundDefect())
  349.                 self._cur.set_payload(EMPTYSTRING.join(preamble))
  350.                 epilogue = []
  351.                 for line in self._input:
  352.                     if line is NeedMoreData:
  353.                         yield NeedMoreData
  354.                         continue
  355.                         continue
  356.                 
  357.                 self._cur.epilogue = EMPTYSTRING.join(epilogue)
  358.                 return None
  359.             
  360.             if linesep:
  361.                 epilogue = [
  362.                     '']
  363.             else:
  364.                 epilogue = []
  365.             for line in self._input:
  366.                 if line is NeedMoreData:
  367.                     yield NeedMoreData
  368.                     continue
  369.                 
  370.                 epilogue.append(line)
  371.             
  372.             if epilogue:
  373.                 firstline = epilogue[0]
  374.                 bolmo = NLCRE_bol.match(firstline)
  375.                 if bolmo:
  376.                     epilogue[0] = firstline[len(bolmo.group(0)):]
  377.                 
  378.             
  379.             self._cur.epilogue = EMPTYSTRING.join(epilogue)
  380.             return None
  381.         
  382.         lines = []
  383.         for line in self._input:
  384.             if line is NeedMoreData:
  385.                 yield NeedMoreData
  386.                 continue
  387.             
  388.             lines.append(line)
  389.         
  390.         self._cur.set_payload(EMPTYSTRING.join(lines))
  391.  
  392.     
  393.     def _parse_headers(self, lines):
  394.         lastheader = ''
  395.         lastvalue = []
  396.         for lineno, line in enumerate(lines):
  397.             if line[0] in ' \t':
  398.                 if not lastheader:
  399.                     defect = Errors.FirstHeaderLineIsContinuationDefect(line)
  400.                     self._cur.defects.append(defect)
  401.                     continue
  402.                 
  403.                 lastvalue.append(line)
  404.                 continue
  405.             
  406.             if lastheader:
  407.                 lhdr = EMPTYSTRING.join(lastvalue)[:-1].rstrip('\r\n')
  408.                 self._cur[lastheader] = lhdr
  409.                 lastheader = ''
  410.                 lastvalue = []
  411.             
  412.             if line.startswith('From '):
  413.                 if lineno == 0:
  414.                     mo = NLCRE_eol.search(line)
  415.                     if mo:
  416.                         line = line[:-len(mo.group(0))]
  417.                     
  418.                     self._cur.set_unixfrom(line)
  419.                     continue
  420.                 elif lineno == len(lines) - 1:
  421.                     self._input.unreadline(line)
  422.                     return None
  423.                 else:
  424.                     defect = Errors.MisplacedEnvelopeHeaderDefect(line)
  425.                     self._cur.defects.append(defect)
  426.             
  427.             i = line.find(':')
  428.             if i < 0:
  429.                 defect = Errors.MalformedHeaderDefect(line)
  430.                 self._cur.defects.append(defect)
  431.                 continue
  432.             
  433.             lastheader = line[:i]
  434.             lastvalue = [
  435.                 line[i + 1:].lstrip()]
  436.         
  437.         if lastheader:
  438.             self._cur[lastheader] = EMPTYSTRING.join(lastvalue).rstrip('\r\n')
  439.         
  440.  
  441.  
  442.